Normal Start Functions

Load isofiles

isofiles <-
  file.path(
    "Results",
    c(
      "180226_alkanes_C_GB",
      "180228_alkanes_C_GB"
    )
  ) %>%
  iso_read_continuous_flow()
## # A tibble: 12 x 4
##    file_id                  type  func           details                  
##    <chr>                    <chr> <chr>          <chr>                    
##  1 511__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  2 511__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  3 512__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  4 512__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  5 513__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  6 513__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  7 535__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
##  8 535__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
##  9 536__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 10 536__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…
## 11 537__Linearity CO2__.dxf error extract_dxf_r… cannot identify measured…
## 12 537__Linearity CO2__.dxf error extract_isoda… `peak`, `start`, `rt`, `…

Data

File Info

# all file info
isofiles %>% iso_get_file_info()
## Info: aggregating file info from 45 data file(s)

Vendor data table

isofiles %>% iso_get_vendor_data_table()
## Info: aggregating vendor data table without units from 45 data file(s)

Chromatograms

isofiles %>% 
  # fetch a few of the files of interst
  iso_filter_files(parse_number(Analysis) %in% c(543, 550)) %>% 
  # plot just mass 28
  iso_plot_continuous_flow_data(data = c(46)) %>% 
  # make interactive
  ggplotly(dynamicTicks = TRUE)
## Warning: You need the dev version of ggplot2 to use `dynamicTicks`

Analysis

Select relevant data

data_table <-
  isofiles %>%
  # filter the files you want to use 
  # --> exclude CO2 zeros
  iso_filter_files(!str_detect(`Identifier 1`, "CO2 zero")) %>% 
  # --> select only good analyses
  iso_filter_files(parse_number(Analysis) > 515) %>% 
  # get the vendor data table and file info
  iso_get_vendor_data_table(
    select = c(
      # peak info
      Nr., Start, Rt, End,
      # amplitudes and intensities
      `Ampl 44`, `Ampl 46`, `Intensity All`, `Intensity 44`,
      # ratios and deltas
      `R 46CO2/44CO2`, `d 13C/12C`
    ),
    include_file_info = c(
      file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, `Seed Oxidation`
    )
  ) %>% 
  # rename some columns to be easier to work with
  rename(
    Ampl44=`Ampl 44`, Ampl46=`Ampl 46`, # amplitudes
    #Area28=`rIntensity 28`, Area29=`rIntensity 29`, # areas
    Intensity=`Intensity All`, #peak intensities
    R46C = `R 46CO2/44CO2`, d13C = `d 13C/12C`, # ratio and delta values
    rt = `Rt`,
    rt_start = `Start`,
    rt_end = `End`
    #File = `file_id`
  )
## Info: applying file filter, keeping 37 of 45 files
## Info: applying file filter, keeping 32 of 37 files
## Info: aggregating vendor data table without units from 32 data file(s), including file info 'c(file_datetime, Analysis, `Identifier 1`, `Identifier 2`, `GC Method`, 
##     `Seed Oxidation`)'
data_table

Map peaks

### CHANGE MAPPING FILE NAME ###
metadata_samples <- read_excel(file.path("metadata", "20180226_GB_metadata.xlsx"), sheet = "files")

metadata_peak_maps <- read_excel(file.path("metadata","20180226_GB_metadata.xlsx"), sheet = "maps")

metadata_samples
data_table_with_peaks <- data_table %>%
    iso_add_metadata(metadata_samples, match_by = c(`Identifier 1`, Analysis)) %>%
    iso_map_peaks(metadata_peak_maps) 
## Info: metadata added to 651 data rows, 0 left without metadata:
## - 1 metadata entries were mapped to 651 data entires via column 'Identifier 1'
## Info: 618 peaks in 29 files were successfully assigned, 33 could not be assigned, and 136 were missing
# missing and unidentified peaks
data_table_with_peaks %>% filter(!is_identified | is_missing)
# confirmed peaks
data_table_with_peaks <- data_table_with_peaks %>%
    filter(is_identified, !is_missing, !is_ambiguous)

data_table_with_peaks

Check stability of reference peaks

The second one is the one defined to be 0 permil (so will always be), the rest is relative to that peak.

p <- data_table_with_peaks %>% 
  filter(is_ref_peak == "yes") %>% 
  ggplot() + 
  aes(Nr., d13C, color = file_id) +
  geom_line() + 
  theme_bw()
ggplotly(p)

Add standard values

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
data_w_stds <- data_table_with_peaks %>% 
  filter(type == "standard", is_ref_peak == "no") %>% 
  left_join(standards, by = "compound") %>% 
  mutate(is_std = !is.na(true.d13C) | !is.na(true.d2H))

Process data

Focus on the analytes and calculate a few summary parameters we want to use later.

data_w_analyte_peaks <- 
  data_table_with_peaks %>% 
  # this is important so that the reference peaks are not caught up in the next set of calculations
  filter(is_ref_peak == "no") %>% 
  # for each analysis calculate averages across analysis
  group_by(Analysis) %>% 
  mutate(
    ampl_sample_mean.mV = mean(`Ampl44`), ampl_sample_sd.mV = sd(`Ampl44`),
    area_sample_mean.Vs = mean(`Intensity 44`), area_sample_sd.Vs = sd(`Intensity 44`)
  )

Standards

standards <- read_excel(file.path("metadata", "gc_irms_indiana_A6.xlsx"))
kable(standards)
compound true.d2H true.d13C
C16 -9.1 -26.15
C17 -117.8 -31.88
C18 -52.0 -32.70
C19 -56.3 -31.99
C20 -89.7 -33.97
C21 -177.8 -28.83
C22 -81.3 -33.77
C23 -67.2 -33.37
C24 -29.7 -32.13
C25 -263.0 -28.46
C26 -45.9 -32.94
C27 -172.8 -30.49
C28 -36.8 -33.20
C29 -177.8 -29.10
C30 -213.6 -29.84
data_w_stds <-
  data_w_analyte_peaks %>% 
  iso_add_standards(standards)  
## Info: added 13 standard entries to 299 out of 299 rows

Visualize standards

v <- data_w_stds %>% 
  ggplot() +
  aes(x = true.d13C, y = d13C, color = file_id) + 
  geom_smooth(method = "lm", se = FALSE, alpha = 0.5) +
  geom_point() +
  theme_bw() +
  theme(legend.position = "none") 
ggplotly(v)
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Calibration

data_w_calibs <- data_w_stds %>% 
  # prepare for calibration by defining the grouping column(s) and setting default parameters
  iso_prepare_for_calibration(group_by = c(Analysis)) %>% 
  iso_set_default_process_parameters(delta_residual = resid_d13C) %>% 
  # run calibration
  iso_calibrate_delta(model = lm(`d13C` ~ true.d13C)) %>% 
  # pull out some columns we want generally available
  iso_unnest_calib_data(select = c(starts_with("Id"), ampl_sample_mean.mV)) 
## Info: preparing data for calibration by grouping based on 'Analysis' and nesting the grouped datasets into 'calibration_data'
## Info: calculating delta calibration fits based on 1 model ('lm(d13C ~ true.d13C)') for 26 data group(s) in 'calibration_data' with filter 'is_standard'; storing residuals in 'resid_d13C.'
data_w_calibs %>% 
  iso_unnest_delta_calib_summary(keep_other_list_data = FALSE) %>% 
  kable(digits =3)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV r.squared adj.r.squared sigma statistic p.value df logLik AIC BIC deviance df.residual
517 A6 0.5uL 397.946 0.870 0.854 0.859 53.467 0.000 2 -11.551 29.102 30.010 5.900 8
518 A6 1.0uL 1028.965 0.853 0.839 0.956 63.715 0.000 2 -16.780 39.560 41.255 10.060 11
519 A6 2.0uL 2044.818 0.932 0.926 0.591 151.570 0.000 2 -10.522 27.044 28.739 3.841 11
520 A6 3uL 3075.854 0.978 0.976 0.323 485.383 0.000 2 -2.650 11.300 12.995 1.144 11
522 A6 0.5uL 446.709 0.914 0.902 0.716 74.560 0.000 2 -8.637 23.275 23.866 3.592 7
523 A6 1.0uL 1054.275 0.894 0.884 0.788 92.529 0.000 2 -14.265 34.530 36.225 6.832 11
524 A6 2.0uL 2062.263 0.929 0.922 0.613 143.730 0.000 2 -11.005 28.010 29.705 4.138 11
525 A6 3uL 3596.214 0.992 0.991 0.192 1324.835 0.000 2 4.092 -2.184 -0.489 0.406 11
526 A6 0.2uL 57.897 0.905 0.857 0.616 18.999 0.049 2 -2.353 10.707 8.866 0.760 2
527 A6 0.5uL 476.656 0.904 0.890 0.796 65.928 0.000 2 -9.589 25.179 25.770 4.438 7
528 A6 1.0uL 1144.098 0.920 0.913 0.657 127.265 0.000 2 -11.905 29.810 31.505 4.752 11
529 A6 2.0uL 2158.592 0.947 0.943 0.513 198.511 0.000 2 -8.686 23.373 25.068 2.896 11
530 A6 3uL 3608.378 0.995 0.995 0.142 2409.987 0.000 2 8.019 -10.038 -8.343 0.222 11
540 A6 0.5uL 423.750 0.886 0.870 0.849 54.634 0.000 2 -10.165 26.330 26.921 5.044 7
541 A6 0.5uL 412.138 0.899 0.884 0.822 62.157 0.000 2 -9.876 25.752 26.343 4.730 7
542 A6 0.5uL 413.941 0.872 0.856 0.941 54.685 0.000 2 -12.463 30.926 31.834 7.080 8
543 A6 0.5uL 405.228 0.751 0.723 1.492 27.121 0.001 2 -18.905 43.811 45.005 20.033 9
544 A6 0.5uL 441.497 0.867 0.851 0.922 52.288 0.000 2 -12.261 30.522 31.430 6.800 8
545 A6 0.5uL 413.272 0.832 0.817 1.085 54.406 0.000 2 -18.420 42.840 44.535 12.948 11
546 A6 0.5uL 445.801 0.853 0.834 0.967 46.318 0.000 2 -12.734 31.469 32.377 7.475 8
547 A6 1.0uL 1161.397 0.879 0.868 0.861 79.857 0.000 2 -15.411 36.821 38.516 8.149 11
548 A6 1.0uL 1162.422 0.865 0.852 0.894 70.351 0.000 2 -15.901 37.802 39.497 8.788 11
549 A6 1.0uL 1219.784 0.898 0.888 0.757 96.471 0.000 2 -13.733 33.465 35.160 6.295 11
550 A6 1.0uL 1288.446 0.889 0.879 0.787 88.190 0.000 2 -14.252 34.504 36.199 6.819 11
551 A6 1.0uL 1292.487 0.915 0.907 0.685 117.820 0.000 2 -12.433 30.865 32.560 5.154 11
552 A6 1.0uL 1381.540 0.898 0.888 0.754 96.335 0.000 2 -13.695 33.389 35.084 6.259 11
data_w_calibs %>% 
  # pull out seed oxidation in addition
  iso_unnest_calib_data(select = `Seed Oxidation`) %>% 
  iso_unnest_delta_calib_coefs(select = c(-statistic), keep_other_list_data = FALSE) %>% 
  # arrange by term and Analysis to get a quick idea of what the numbers across analyses
  arrange(term, Analysis) %>% 
  kable(digits = 2)
Analysis Identifier 1 Identifier 2 ampl_sample_mean.mV Seed Oxidation term estimate std.error p.value signif
517 A6 0.5uL 397.95 1 (Intercept) 11.08 4.46 0.04 *
518 A6 1.0uL 1028.96 1 (Intercept) 13.21 4.39 0.01 *
519 A6 2.0uL 2044.82 1 (Intercept) 11.91 2.71 0.00 **
520 A6 3uL 3075.85 1 (Intercept) 11.22 1.48 0.00 ***
522 A6 0.5uL 446.71 1 (Intercept) 13.68 4.09 0.01 *
523 A6 1.0uL 1054.27 1 (Intercept) 13.10 3.62 0.00 **
524 A6 2.0uL 2062.26 1 (Intercept) 12.23 2.82 0.00 **
525 A6 3uL 3596.21 1 (Intercept) 10.72 0.88 0.00 ***
526 A6 0.2uL 57.90 1 (Intercept) 5.98 6.11 0.43
527 A6 0.5uL 476.66 1 (Intercept) 15.45 4.54 0.01 *
528 A6 1.0uL 1144.10 1 (Intercept) 12.47 3.02 0.00 **
529 A6 2.0uL 2158.59 1 (Intercept) 11.81 2.36 0.00 ***
530 A6 3uL 3608.38 1 (Intercept) 10.74 0.65 0.00 ***
540 A6 0.5uL 423.75 1 (Intercept) 14.20 4.84 0.02 *
541 A6 0.5uL 412.14 1 (Intercept) 15.43 4.69 0.01 *
542 A6 0.5uL 413.94 1 (Intercept) 14.58 4.89 0.02 *
543 A6 0.5uL 405.23 1 (Intercept) 17.35 7.54 0.05 *
544 A6 0.5uL 441.50 1 (Intercept) 13.18 4.79 0.02 *
545 A6 0.5uL 413.27 1 (Intercept) 14.54 4.98 0.01 *
546 A6 0.5uL 445.80 1 (Intercept) 12.79 5.02 0.03 *
547 A6 1.0uL 1161.40 1 (Intercept) 13.68 3.95 0.01 **
548 A6 1.0uL 1162.42 1 (Intercept) 12.93 4.10 0.01 **
549 A6 1.0uL 1219.78 1 (Intercept) 12.63 3.47 0.00 **
550 A6 1.0uL 1288.45 1 (Intercept) 12.45 3.61 0.01 **
551 A6 1.0uL 1292.49 1 (Intercept) 12.67 3.14 0.00 **
552 A6 1.0uL 1381.54 1 (Intercept) 12.48 3.46 0.00 **
517 A6 0.5uL 397.95 1 true.d13C 1.05 0.14 0.00 ***
518 A6 1.0uL 1028.96 1 true.d13C 1.11 0.14 0.00 ***
519 A6 2.0uL 2044.82 1 true.d13C 1.05 0.09 0.00 ***
520 A6 3uL 3075.85 1 true.d13C 1.03 0.05 0.00 ***
522 A6 0.5uL 446.71 1 true.d13C 1.12 0.13 0.00 ***
523 A6 1.0uL 1054.27 1 true.d13C 1.10 0.11 0.00 ***
524 A6 2.0uL 2062.26 1 true.d13C 1.07 0.09 0.00 ***
525 A6 3uL 3596.21 1 true.d13C 1.01 0.03 0.00 ***
526 A6 0.2uL 57.90 1 true.d13C 0.87 0.20 0.05 *
527 A6 0.5uL 476.66 1 true.d13C 1.18 0.14 0.00 ***
528 A6 1.0uL 1144.10 1 true.d13C 1.08 0.10 0.00 ***
529 A6 2.0uL 2158.59 1 true.d13C 1.05 0.07 0.00 ***
530 A6 3uL 3608.38 1 true.d13C 1.01 0.02 0.00 ***
540 A6 0.5uL 423.75 1 true.d13C 1.14 0.15 0.00 ***
541 A6 0.5uL 412.14 1 true.d13C 1.18 0.15 0.00 ***
542 A6 0.5uL 413.94 1 true.d13C 1.16 0.16 0.00 ***
543 A6 0.5uL 405.23 1 true.d13C 1.26 0.24 0.00 ***
544 A6 0.5uL 441.50 1 true.d13C 1.11 0.15 0.00 ***
545 A6 0.5uL 413.27 1 true.d13C 1.16 0.16 0.00 ***
546 A6 0.5uL 445.80 1 true.d13C 1.10 0.16 0.00 ***
547 A6 1.0uL 1161.40 1 true.d13C 1.12 0.12 0.00 ***
548 A6 1.0uL 1162.42 1 true.d13C 1.09 0.13 0.00 ***
549 A6 1.0uL 1219.78 1 true.d13C 1.08 0.11 0.00 ***
550 A6 1.0uL 1288.45 1 true.d13C 1.07 0.11 0.00 ***
551 A6 1.0uL 1292.49 1 true.d13C 1.08 0.10 0.00 ***
552 A6 1.0uL 1381.54 1 true.d13C 1.07 0.11 0.00 ***

Parameters

data_params <- data_w_calibs %>% 
  # pull out remaining columns we want available (some might already be pulled out but that's okay)
  #Note: Garrett changed "Preparation" to "Seed Oxidation" - we may want a different shape variable
  iso_unnest_calib_data(select = c(file_datetime, ampl_sample_mean.mV, `Seed Oxidation`, is_standard)) 
  
# visualize the delta calibration fits
data_params %>% 
  #NEED TO FILTER OUT SAMPLES iso_filter_files(`Identifier 1` == "A5") 
  iso_visualize_delta_calib_fits(x = Analysis, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. analysis")

data_params %>% 
  iso_visualize_delta_calib_fits(x = ampl_sample_mean.mV, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. amplitude") 

data_params %>% 
  iso_visualize_delta_calib_fits(x = file_datetime, color = `Identifier 2`, shape = `Seed Oxidation`, size = ampl_sample_mean.mV,
                                 include_from_summary = c(adj.r.squared, deviance)) + labs(title = "parameters vs. time")

# turn the last plot into an interactive one
ggplotly(ggplot2::last_plot() + theme(legend.position = "none"))
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`

Compounds

data_w_calibs %>% 
  # pull out relevant data
  iso_unnest_calib_data(select = everything()) %>% 
  filter(is_standard) %>% 
  # calculate deviation from means
  group_by(Analysis) %>% 
  mutate(
    `Var: residual d13C [permil]` = resid_d13C,
    `Var: area diff from mean [%]` = (`Intensity 44`/mean(`Intensity 44`) - 1) * 100,
    `Var: amplitude diff from mean [%]` = (`Ampl44`/mean(`Ampl44`) - 1) * 100
  ) %>%
  # visualize
  iso_visualize_data(x = compound, y = starts_with("Var"), group = Analysis, color = `Identifier 2`)

ggplotly(ggplot2::last_plot())
## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`